###############################################
#TABLE A1
###############################################

library(dplyr)
library(xtable)

####load cleaned data####

data1 <- read.csv("./data/data1.csv")
data2 <- read.csv("./data/data2.csv")
data3 <- read.csv("./data/data3.csv")

#balance for data1

# Fit balance regressions
b1  <- glm(trt ~ age, data = data1)
b2  <- glm(trt ~ factor(education), data = data1)
b3  <- glm(trt ~ lr_1, data = data1)
b4a <- glm(trt ~ natid_covar1_1, data = data1)
b4b <- glm(trt ~ natid_covar2_1, data = data1)
b5  <- glm(trt ~ female, data = data1)
b6  <- glm(trt ~ income, data = data1)
b7  <- glm(trt ~ factor(Current.UK.area.of.residence), data = data1)
b8  <- glm(trt ~ factor(UK.area.of.birth), data = data1)
b9  <- glm(trt ~ factor(Employment.Status), data = data1)

# Extract coefficients and p-values (non-intercept terms)
coefs <- data.frame(c(
  b1$coefficients[2], b2$coefficients[2], b3$coefficients[2], b4a$coefficients[2], b4b$coefficients[2],
  b5$coefficients[2], b6$coefficients[2],
  b7$coefficients[-1], b8$coefficients[-1], b9$coefficients[-1]
))

ps <- data.frame(c(
  coef(summary(b1))[2,4], coef(summary(b2))[2,4], coef(summary(b3))[2,4], coef(summary(b4a))[2,4], coef(summary(b4b))[2,4],
  coef(summary(b5))[2,4], coef(summary(b6))[2,4],
  coef(summary(b7))[-1,4], coef(summary(b8))[-1,4], coef(summary(b9))[-1,4]
))

# Build balance table
coefs <- data.frame(t(coefs))
ps    <- data.frame(t(ps))

Covariates <- c(
  "Age",
  "Education (Low)", "Left-Right Self-Placement", "Pre-Treatment National Identification 1", "Pre-Treatment National Identification 2",
  "Female", "Income (Ordinal)",
  "Residence (East Midlands)", "Residence (East England)", "Residence (London)", "Residence (North-East England)", "Residence (North-West England)",
  "Residence (Northern Ireland)", "Residence (Scotland)", "Residence (South-East England)", "Residence (South-West England)",
  "Residence (Wales)", "Residence (West Midlands)", "Residence (Yorkshire & The Humber)",
  "Birthplace (Revoked)", "Birthplace (East Midlands)", "Birthplace (East England)", "Birthplace (London)", "Birthplace (North-East England)", "Birthplace (North-West England)",
  "Birthplace (Scotland)", "Birthplace (South-East England)", "Birthplace (South-West England)",
  "Birthplace (Wales)", "Birthplace (West Midlands)", "Birthplace (Yorkshire & The Humber)",
  "Employment Status Expired", "New Job", "Full-Time Work", "Not in Paid Work", "Other Work", "Part-Time Work", "Unemployed"
)

colnames(ps) <- colnames(coefs)

bal <- data.frame(rbind(Covariates, coefs, ps))
bal <- data.frame(t(bal))
rownames(bal) <- NULL
colnames(bal) <- c("Covariate", "Coefficient", "P-Value")
bal$Coefficient <- signif(as.numeric(bal$Coefficient), 3)
bal$`P-Value`   <- signif(as.numeric(bal$`P-Value`), 3)

bala  <- bal
bala1 <- bala[1:6, ]


#balance for data2

b1  <- glm(trt ~ age, data = data2)
b2  <- glm(trt ~ factor(education), data = data2)
b3  <- glm(trt ~ lr_1, data = data2)
b4a <- glm(trt ~ natid_covar1_1, data = data2)
b4b <- glm(trt ~ natid_covar2_1, data = data2)
b5  <- glm(trt ~ female, data = data2)
b6  <- glm(trt ~ income, data = data2)
b7  <- glm(trt ~ factor(Current.UK.area.of.residence), data = data2)
b8  <- glm(trt ~ factor(UK.area.of.birth), data = data2)
b9  <- glm(trt ~ factor(Employment.Status), data = data2)

coefs <- data.frame(c(
  b1$coefficients[2], b2$coefficients[2], b3$coefficients[2], b4a$coefficients[2], b4b$coefficients[2],
  b5$coefficients[2], b6$coefficients[2],
  b7$coefficients[-1], b8$coefficients[-1], b9$coefficients[-1]
))

ps <- data.frame(c(
  coef(summary(b1))[2,4], coef(summary(b2))[2,4], coef(summary(b3))[2,4], coef(summary(b4a))[2,4], coef(summary(b4b))[2,4],
  coef(summary(b5))[2,4], coef(summary(b6))[2,4],
  coef(summary(b7))[-1,4], coef(summary(b8))[-1,4], coef(summary(b9))[-1,4]
))

coefs <- data.frame(t(coefs))
ps    <- data.frame(t(ps))

Covariates <- c(
  "Age",
  "Education (Low)", "Left-Right Self-Placement", "Pre-Treatment National Identification 1", "Pre-Treatment National Identification 2",
  "Female", "Income (Ordinal)",
  "Residence (East Midlands)", "Residence (East England)", "Residence (London)", "Residence (North-East England)", "Residence (North-West England)",
  "Residence (Northern Ireland)", "Residence (Scotland)", "Residence (South-East England)", "Residence (South-West England)",
  "Residence (Wales)", "Residence (West Midlands)", "Residence (Yorkshire & The Humber)",
  "Birthplace (Revoked)", "Birthplace (East Midlands)", "Birthplace (East England)", "Birthplace (London)", "Birthplace (North-East England)", "Birthplace (North-West England)",
  "Birthplace (Scotland)", "Birthplace (South-East England)", "Birthplace (South-West England)",
  "Birthplace (Wales)", "Birthplace (West Midlands)", "Birthplace (Yorkshire & The Humber)",
  "Employment Status Expired", "New Job", "Full-Time Work", "Not in Paid Work", "Other Work", "Part-Time Work", "Unemployed"
)

colnames(ps) <- colnames(coefs)

bal <- data.frame(rbind(Covariates, coefs, ps))
bal <- data.frame(t(bal))
rownames(bal) <- NULL
colnames(bal) <- c("Covariate", "Coefficient", "P-Value")
bal$Coefficient <- signif(as.numeric(bal$Coefficient), 3)
bal$`P-Value`   <- signif(as.numeric(bal$`P-Value`), 3)

balb  <- bal
balb1 <- balb[1:6, ]   # (fixed: was mistakenly bala)


#balance for data3

b1  <- glm(trt ~ age, data = data3)
b2  <- glm(trt ~ factor(education), data = data3)
b3  <- glm(trt ~ lr_1, data = data3)
b4a <- glm(trt ~ natid_covar1_1, data = data3)
b4b <- glm(trt ~ natid_covar2_1, data = data3)
b5  <- glm(trt ~ female, data = data3)
b6  <- glm(trt ~ income, data = data3)
b7  <- glm(trt ~ factor(Current.UK.area.of.residence), data = data3)
b8  <- glm(trt ~ factor(UK.area.of.birth), data = data3)
b9  <- glm(trt ~ factor(Employment.Status), data = data3)

coefs <- data.frame(c(
  b1$coefficients[2], b2$coefficients[2], b3$coefficients[2], b4a$coefficients[2], b4b$coefficients[2],
  b5$coefficients[2], b6$coefficients[2],
  b7$coefficients[-1], b8$coefficients[-1], b9$coefficients[-1]
))

ps <- data.frame(c(
  coef(summary(b1))[2,4], coef(summary(b2))[2,4], coef(summary(b3))[2,4], coef(summary(b4a))[2,4], coef(summary(b4b))[2,4],
  coef(summary(b5))[2,4], coef(summary(b6))[2,4],
  coef(summary(b7))[-1,4], coef(summary(b8))[-1,4], coef(summary(b9))[-1,4]
))

coefs <- data.frame(t(coefs))
ps    <- data.frame(t(ps))

Covariates <- c(
  "Age",
  "Education (Low)", "Left-Right Self-Placement", "Pre-Treatment National Identification 1", "Pre-Treatment National Identification 2",
  "Female", "Income (Ordinal)",
  "Residence (East England)", "Residence (London)", "Residence (North-East England)", "Residence (North-West England)",
  "Residence (Scotland)", "Residence (South-East England)", "Residence (South-West England)",
  "Residence (Wales)", "Residence (West Midlands)", "Residence (Yorkshire & The Humber)",
  "Birthplace (East Midlands)", "Birthplace (East England)", "Birthplace (London)", "Birthplace (North-East England)", "Birthplace (North-West England)",
  "Birthplace (Scotland)", "Birthplace (South-East England)", "Birthplace (South-West England)",
  "Birthplace (Wales)", "Birthplace (West Midlands)", "Birthplace (Yorkshire & The Humber)",
  "New Job", "Full-Time Work", "Not in Paid Work", "Other Work", "Part-Time Work", "Unemployed"
)

colnames(ps) <- colnames(coefs)

bal <- data.frame(rbind(Covariates, coefs, ps))
bal <- data.frame(t(bal))
rownames(bal) <- NULL
colnames(bal) <- c("Covariate", "Coefficient", "P-Value")
bal$Coefficient <- signif(as.numeric(bal$Coefficient), 3)
bal$`P-Value`   <- signif(as.numeric(bal$`P-Value`), 3)

balc  <- bal
balc1 <- balc[1:6, ]   # (fixed: was mistakenly bala)


#combined balance table

bal_com <- bala %>%
  dplyr::left_join(balb, by = "Covariate") %>%
  dplyr::left_join(balc, by = "Covariate")

bal_com1 <- bal_com[1:6, ]


#export to LaTeX

xtable::xtable(bal_com1, caption = "Balance Test", include.rownames = FALSE) %>%
  print(include.rownames = FALSE)

rownames(bal_com) <- NULL

xtable::xtable(bal_com, caption = "Full Balance Test", include.rownames = FALSE) %>%
  print(include.rownames = FALSE)
